---
title: "Replication Code for How Governance and Disasters Shape Renewable Energy"
author: "Timothy Fraser, Northeastern University"
date: "Updated March 2, 2020"
output:
  html_document:
    df_print: paged
    toc: true
    
---

This document replicates and revises the R script for the studied, "How Governance and Disasters shape Renewable Energy Transitions: The case of Japanese mega-solar," which was conducted in August of 2018 and published in 2019 in Social Science Quarterly.

Recently, it came to my attention that I made an error in my coding for multiple imputation. As a result, I have updated the analysis, using mean imputation instead. Mean imputation is a fairly conservative way of replacing missing data, because it biases *against* small trends in your data. As a result, any statistically significant effects that we find tend to be strong trends in the data. This analysis confirms that the effects discussed in the paper are still consistent with the corrected, mean-imputed models.

Below, I exhibit my code and describe and explain its outputs, step by step.

# 1. Import Data and Load Packages

```{r, message = FALSE, waring = FALSE}
# Load packages
library(tidyverse) # for data visualization and wrangling
library(car) # for multicolinearity measures
library(BaylorEdPsych) # for GLM goodness of fit measures
library(gtools) # for getting stars from pvalues easily

# also please install pcsl and mfx, but don't load them 
# (they require MASS, which conflicts with tidyverse's select function)

# Import dataset
dat <- readxl::read_excel("Database_prefectural_analysis_R_relevant_variables.xlsx") %>%
  select(
    # Import basic identifying information    
    muni_code = Code, 
    muni = Municipality_Japanese, 
    pref = Prefecture_English, 
    utility = Regional_Power_Company,
    
    # Import number of solar farms of various sizes, 
    # and rename them to easily understandable names
    sp_total = SP, 
    sp_small = SP_10_49_kW, 
    sp_medium = SP_50_499_kW, 
    sp_large = SP_500_1999kW,
    sp_extralarge = `SP_2000_kW+`,
    
    # Import predictors
    pv_output = PV_output_2018, 
    area = Area_inhabitable_2010, 
    pop = Population_2010,
    employment_energy = PercentEmployment_EGHW_2009, 
    land_prices = Land_price_residential_2009, 
    crime_rates = Crime_Rate_2008, 
    voter_turnout = voter_turnout_2012,
    winning_party_voteshare = ku_LDP_voteshare_2012,
    unemployment = Unemployment_2010, 
    income_per_capita = Income_taxable_per_capita_2010,
    ratio_revs_exp = Ratio_Revs_Exp_2010, 
    carbon_emissions_change = PercentChange_CO2_industry_2000_03,
    deaths = death11, 
    damages = damaged11, 
    exclusion_zone = Fukushima_Exclusion_Zone) %>%
  # Convert categorical variables to factors, which will preserve their levels in models
  mutate(pref = as.factor(pref),
         exclusion_zone = factor(exclusion_zone, levels = c("1", "0")) )

```



```{r}
# Calculate missing data (no variables over 5%)
dat %>%
  select(-c(muni_code, muni,pref,utility,exclusion_zone)) %>%  
  summarise_all(funs(sum(is.na(.)) / n() * 100 )) %>%
  t() %>% as.data.frame() %>% rename(`(%) Missing` = V1)
```



```{r}
# Remove outcome values that are 0, so that we can model just the positive values

# Let's write a quick function so that when we scale our predictors,
# they come out as simple vectors, not matrices.
scale_fixed = function(x){
  scale(x) %>% 
    as.data.frame() %>% 
    unlist() %>% 
    return()}

dat.count = dat %>%
  mutate_at(
    # exclude categories and outcome variables;
    # (note: no outcomes are missing data)
    vars(-c(muni_code, muni, pref, utility, exclusion_zone,
            sp_total, sp_small, sp_medium, sp_large, sp_extralarge)),
    # Replace missing data points in all predictors with the mean of the predictor
    funs(if_else(is.na(.), true = mean(., na.rm = TRUE), false = . ))) %>%
  # Now scale all predictors, so they are mean centered at 0
  # Doing so will allow us to compare the effects of numeric predictors
  mutate_at(
    vars(-c(muni_code, muni, pref, utility, exclusion_zone,
            sp_total, sp_small, sp_medium, sp_large, sp_extralarge)),
    funs(scale_fixed)) %>%
  # In order to use negative binomial models, let's left censor the data
  # so that all our zeros are replaced with NAs.
  mutate(
    sp_total = if_else(sp_total == 0, NA_real_, sp_total),
    sp_small = if_else(sp_small == 0, NA_real_, sp_small),
    sp_medium = if_else(sp_medium == 0, NA_real_, sp_medium),
    sp_large = if_else(sp_large == 0, NA_real_, sp_large),
    sp_extralarge = if_else(sp_extralarge == 0, NA_real_, sp_extralarge))


```




# 3. Dispersion Tests

```{r}

# Is our data overdispersed?
dat %>%
  summarize(
    disp_total = var(sp_total) / mean(sp_total),
    disp_small = var(sp_small) / mean(sp_small),
    disp_medium = var(sp_medium) / mean(sp_medium),
    disp_large = var(sp_large) / mean(sp_large),
    disp_extralarge = var(sp_extralarge) / mean(sp_extralarge)) %>%
  pivot_longer(
    cols = -c(),
    names_to = "type",
    names_prefix = "disp_",
    values_to = "dispersion")
# Quite!
# Looks like the negative binomial model will probably be a better fit than poisson models.

```


# 4. Modeling

## 4.1 Total Solar Adoption

```{r}
# Run a negative binomial model, using robust standard errors with marginal
nb.sp_total <- dat.count %>% 
  mfx::negbinmfx(formula = sp_total ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Run a poisson model
p.sp_total <- dat.count %>% 
  mfx::poissonmfx(formula = sp_total ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Does the Negative Binomial Model significantly improve upon the poisson?
data.frame(
  loglik_nb = logLik(nb.sp_total$fit),
  loglik_poisson = logLik(p.sp_total$fit)) %>%
  mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), df = 1, lower.tail = FALSE))
```

Let's evaluate a Vuong test, which tests model fit using the AIC of both models.

```{r}
# This test apparently uses a lot of computational power;
# had to suppress it in order to write to pdf.
# If you run it, you will find that all vuong test results are statistically significant 
# below p < 0.001, and they all favor the negative binomial model.
# pscl::vuong(nb.sp_total$fit, p.sp_total$fit)
```

Looks like the negative binomial model has a statistically significant, better fit.


```{r}
# Prefectures generate high VIFs for GVIF, but the appropriate way to compare categorical and numeric variables
# is instead to square the GVIF(1/2*DF), and compare those numbers.
# Very, very low VIF all around.
nb.sp_total$fit %>% vif()
```

```{r}
# Pretty good Nagelkerke's R2 - 0.77
nb.sp_total$fit %>% PseudoR2()
```


\newpage


## 4.2 Small Solar Farm Adoption

```{r}
# Run a negative binomial model, using robust standard errors with marginal
nb.sp_small <- dat.count %>% 
  mfx::negbinmfx(formula = sp_small ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Run a poisson model
p.sp_small <- dat.count %>% 
  mfx::poissonmfx(formula = sp_small ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Does the Negative Binomial Model significantly improve upon the poisson?
data.frame(
  loglik_nb = logLik(nb.sp_small$fit),
  loglik_poisson = logLik(p.sp_small$fit)) %>%
  mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), df = 1, lower.tail = FALSE))
```

Let's evaluate a Vuong test, which tests model fit using the AIC of both models.

```{r}
# This test apparently uses a lot of computational power;
# had to suppress it in order to write to pdf.
# Same as above.
# pscl::vuong(nb.sp_small$fit, p.sp_small$fit)
```

Looks like the negative binomial model has a statistically significant, better fit.


```{r}
# Prefectures generate high VIFs for GVIF, but the appropriate way to compare categorical and numeric variables
# is instead to square the GVIF(1/2*DF), and compare those numbers.
# Very, very low VIF all around.
nb.sp_small$fit %>% vif()
```

```{r}
# Pretty good Nagelkerke's R2!
nb.sp_small$fit %>% PseudoR2()
```



\newpage

## 4.3 Medium Solar Farm Adoption

```{r}
# Run a negative binomial model, using robust standard errors with marginal
nb.sp_medium <- dat.count %>% 
  mfx::negbinmfx(formula = sp_medium ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Run a poisson model
p.sp_medium <- dat.count %>% 
  mfx::poissonmfx(formula = sp_medium ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Does the Negative Binomial Model significantly improve upon the poisson?
data.frame(
  loglik_nb = logLik(nb.sp_medium$fit),
  loglik_poisson = logLik(p.sp_medium$fit)) %>%
  mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), df = 1, lower.tail = FALSE))
```

Let's evaluate a Vuong test, which tests model fit using the AIC of both models.

```{r}
# This test apparently uses a lot of computational power;
# had to suppress it in order to write to pdf.
# Same as above.
# pscl::vuong(nb.sp_medium$fit, p.sp_medium$fit)
```

Looks like the negative binomial model has a statistically significant, better fit.

```{r}
# Prefectures generate high VIFs for GVIF, but the appropriate way to compare categorical and numeric variables
# is instead to square the GVIF(1/2*DF), and compare those numbers.
# Very, very low VIF all around.
nb.sp_medium$fit %>% vif()
```

```{r}
# Pretty good Nagelkerke's R2!
nb.sp_medium$fit %>% PseudoR2()
```



\newpage

## 4.4 Large Solar Farm Adoption

```{r}
# Run a negative binomial model, using robust standard errors with marginal
nb.sp_large <- dat.count %>% 
  mfx::negbinmfx(formula = sp_large ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Run a poisson model
p.sp_large <- dat.count %>% 
  mfx::poissonmfx(formula = sp_large ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Does the Negative Binomial Model significantly improve upon the poisson?
data.frame(
  loglik_nb = logLik(nb.sp_large$fit),
  loglik_poisson = logLik(p.sp_large$fit)) %>%
  mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), df = 1, lower.tail = FALSE))
```

Let's evaluate a Vuong test, which tests model fit using the AIC of both models.

```{r}
# This test apparently uses a lot of computational power;
# had to suppress it in order to write to pdf.
# Same as above.
# pscl::vuong(nb.sp_large$fit, p.sp_large$fit)
```

Looks like the negative binomial model has a statistically significant, better fit.

```{r}
# Prefectures generate high VIFs for GVIF, but the appropriate way to compare categorical and numeric variables
# is instead to square the GVIF(1/2*DF), and compare those numbers.
# Very, very low VIF all around.
nb.sp_large$fit %>% vif()
```

```{r}
# Pretty good Nagelkerke's R2!
nb.sp_large$fit %>% PseudoR2()
```





\newpage

## 4.5 Extra-Large Solar Farm Adoption

```{r}
# Run a negative binomial model, using robust standard errors with marginal
nb.sp_extralarge <- dat.count %>% 
  mfx::negbinmfx(formula = sp_extralarge ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Run a poisson model
p.sp_extralarge <- dat.count %>% 
  mfx::poissonmfx(formula = sp_extralarge ~ pv_output + area + pop +
                        employment_energy + land_prices + 
                        crime_rates + winning_party_voteshare + 
                        unemployment + income_per_capita +
                        ratio_revs_exp + carbon_emissions_change +
                        deaths + damages + exclusion_zone + pref, robust = TRUE)

# Does the Negative Binomial Model significantly improve upon the poisson?
data.frame(
  loglik_nb = logLik(nb.sp_extralarge$fit),
  loglik_poisson = logLik(p.sp_extralarge$fit)) %>%
  mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), df = 1, lower.tail = FALSE))
```

Let's evaluate a Vuong test, which tests model fit using the AIC of both models.

```{r}
# This test apparently uses a lot of computational power;
# had to suppress it in order to write to pdf.
# Same as above.
#pscl::vuong(nb.sp_extralarge$fit, p.sp_extralarge$fit)
```

Looks like the negative binomial model has a statistically significant, better fit.

```{r}
# Prefectures generate high VIFs for GVIF, but the appropriate way to compare categorical and numeric variables
# is instead to square the GVIF(1/2*DF), and compare those numbers.
# Very, very low VIF all around.
nb.sp_extralarge$fit %>% vif()
```

```{r}
# Pretty good Nagelkerke's R2!
nb.sp_extralarge$fit %>% PseudoR2()
```


```{r}
# Save models
save(nb.sp_total, nb.sp_small, nb.sp_medium, nb.sp_large, nb.sp_extralarge,
     p.sp_total, p.sp_small, p.sp_medium, p.sp_large, p.sp_extralarge,
     file = "models.RData")
# Remove models, getting rid of poisson models for good.
remove(nb.sp_total, nb.sp_small, nb.sp_medium, nb.sp_large, nb.sp_extralarge,
       p.sp_total, p.sp_small, p.sp_medium, p.sp_large, p.sp_extralarge)
```


# 5. Model Table

## 5.1 Main Predictors Model Table

```{r, results = "asis", echo = FALSE}
# Load model data
load("models.RData")

# Plot marginal effects    
texreg::htmlreg(
  list(nb.sp_total, nb.sp_small, nb.sp_medium, nb.sp_large, nb.sp_extralarge),
  bold = 0.05,
  stars = c(0.001, 0.01, 0.05),
  align.center=TRUE,
  custom.model.names = c("Total Solar", "Small (10-49 kW)", 
                         "Medium (50-499 kW)", "Large (500-1999 kW)", 
                         "Extra-Large (+2000 kW)"),
  custom.coef.map = list(
    "pv_output" = "Photovoltaic Output (2018)",
    "area" = "Inhabitable area (2010)",
    "pop" = "Population (2010)",
    "employment_energy" = "% Employment in electricity, gas, water, or heating (2009)",
    "crime_rates" = "Crime Rate (2008)",
    "winning_party_voteshare" = "% Liberal Democratic Party (LDP) vote share in 2012 Upper House election",
    "unemployment" = "Unemployment Rate (2010)",
    "income_per_capita" = "Taxable Income per capita (2010)",
    "ratio_revs_exp" = "Local government revenues to expenditures ratio (2010)",
    "carbon_emissions_change" = "% Increase in Industrial CO2 emissions (2000-2003)",
    "deaths" = "Deaths in 2011 disaster",
    "damages" = "Damaged properties in 2011 disaster",
    "exclusion_zone0" = "Fukushima Exclusion Zone"),
  omit.coef = "pref",
  custom.note = "*** denotes statistically significant results at p < 0.001, ** at p < 0.01, and * at 0 < 0.05. Estimates are marginal effects, read as the increase in actual solar farms given an increase of one standard deviation in the predictor.",
)
```




## 5.2 Prefectures Model Table

```{r, results = "asis", echo = FALSE}

# Plot marginal effects    
texreg::htmlreg(
  list(nb.sp_total, nb.sp_small, nb.sp_medium, nb.sp_large, nb.sp_extralarge),
  bold = 0.05,
  stars = c(0.001, 0.01, 0.05),
  align.center=TRUE,
  custom.model.names = c("Total Solar", "Small (10-49 kW)", 
                         "Medium (50-499 kW)", "Large (500-1999 kW)", 
                         "Extra-Large (+2000 kW)"),
  custom.coef.map = list(
   "prefHokkaido" = "Hokkaido",
    "prefAkita" = "Akita",
    "prefAomori" = "Aomori",
    "prefIwate" = "Iwate",
  "prefMiyagi" = "Miyagi",
    "prefFukushima" = "Fukushima",
     "prefYamagata" = "Yamagata",
   "prefIbaraki" = "Ibaraki",
  "prefTochigi" = "Tochigi",
  "prefGunma" = "Gunma",
   "prefSaitama" = "Saitama",
    "prefChiba" = "Chiba",
  "prefKanagawa" = "Kanagawa",
  "prefTokyo" = "Tokyo",
   "prefNiigata" = "Niigata",
   "prefNagano" = "Nagano",
   "prefYamanashi" = "Yamanashi",
  "prefShizuoka" = "Shizuoka",
   "prefToyama" = "Toyama",
   "prefIshikawa" = "Ishikawa",
    "prefFukui" = "Fukui",
    "prefGifu" = "Gifu",
   "prefAichi" = "Aichi",
   "prefShiga" = "Shiga",
  
  "prefMie" = "Mie",
  "prefKyoto" = "Kyoto",
  "prefNara" = "Nara",
  "prefWakayama" = "Wakayama",
  "prefOsaka" = "Osaka",
  "prefHyogo" = "Hyogo",
   "prefTokushima" = "Tokushima",
     "prefKagawa" = "Kagawa",
   "prefKochi" = "Kochi",
    "prefEhime" = "Ehime",
  "prefOkayama" = "Okayama",
  "prefTottori" = "Tottori",
   "prefShimane" = "Shimane",
   "prefHiroshima" = "Hiroshima",
   "prefYamaguchi" = "Yamaguchi",
    "prefFukuoka" = "Fukuoka",
   "prefOita" = "Oita",
     "prefSaga" = "Saga",
   "prefNagasaki" = "Nagasaki",
   "prefKumamoto" = "Kumamoto",
   "prefMiyazaki" = "Miyazaki",
   "prefKagoshima" = "Kagoshima",
   "prefOkinawa" = "Okinawa"),
  # Omit any coefficients that begin with this
  omit.coef = "pv|area|pop|income|local|vote|employ|carbon|deaths|damage|zone|land|crime|ratio",
  
  custom.note = "*** denotes statistically significant results at p < 0.001, ** at p < 0.01, and * at 0 < 0.05. Estimates are marginal effects, read as the increase in actual solar farms given an increase of one standard deviation in the predictor.",
)
```



## 5.3 Model Statistics Table


```{r}
load("models.RData")
# Let's write functions to gather several model statistics:

  
# Number of Cases
samplesize = function(model){
  nobs(model$fit) %>% 
    as.data.frame() %>%
    select(sample = 1) %>%
    return()
}

# Mean VIF
gvif = function(model){
model$fit %>%
  # Get the variance inflation factors
  vif() %>% data.frame() %>% 
  # Grab the generalized variance inflation factors, raised to the 1/(2 * df) power,
    # This measure is comparable among both categorical and numeric variables
  select(gvif1_2 = 3) %>%
  # This measure can then be squared to get a measure along the same scale of the original VIF
  # as described by Fox & Monette
  summarize(`Mean VIF` = mean(gvif1_2^2)) %>%
    return()
}

# Pseudo R2
nagelkerke = function(model){
model$fit %>% PseudoR2() %>%
    as.data.frame() %>%
    slice(4) %>%
  select(`Nagelkerke's Pseudo R2` = 1) %>% 
    return()
}


# AIC
aic = function(model){
  model$fit %>% 
    AIC() %>% 
    as.data.frame() %>% 
    select(`AIC` = 1) %>%
    return()
}


gof = bind_cols(

  # Model Name
  data.frame(Model = c("Total Solar", "Small Solar", 
                       "Medium Solar", "Large Solar", 
                       "Extra-Large Solar")),
  
  # Sample Size
  list(nb.sp_total, nb.sp_small, nb.sp_medium, nb.sp_large, nb.sp_extralarge) %>%
    lapply(samplesize) %>% bind_rows(),
  # GVIF
  list(nb.sp_total, nb.sp_small, nb.sp_medium, nb.sp_large, nb.sp_extralarge) %>%
    lapply(gvif) %>% bind_rows(),
  # Nagelkerke's R2
  list(nb.sp_total, nb.sp_small, nb.sp_medium, nb.sp_large, nb.sp_extralarge) %>%
    lapply(nagelkerke) %>% bind_rows(),
  # AIC
  list(nb.sp_total, nb.sp_small, nb.sp_medium, 
       nb.sp_large, nb.sp_extralarge) %>%
    lapply(aic) %>% bind_rows(),
  
  # Chi Square Test P-value
  # How much does Negative Binomial improve upon Poisson Model?
  bind_rows(
    
    data.frame(
      loglik_nb = logLik(nb.sp_total$fit),
      loglik_poisson = logLik(p.sp_total$fit)) %>%
      mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), 
                              df = 1, lower.tail = FALSE)) %>%
      select(p.value),
    
    data.frame(
      loglik_nb = logLik(nb.sp_small$fit),
      loglik_poisson = logLik(p.sp_small$fit)) %>%
      mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), 
                              df = 1, lower.tail = FALSE)) %>%
      select(p.value),
    
    data.frame(
      loglik_nb = logLik(nb.sp_medium$fit),
      loglik_poisson = logLik(p.sp_medium$fit)) %>%
      mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), 
                              df = 1, lower.tail = FALSE)) %>%
      select(p.value),
    
    data.frame(
      loglik_nb = logLik(nb.sp_large$fit),
      loglik_poisson = logLik(p.sp_large$fit)) %>%
      mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), 
                              df = 1, lower.tail = FALSE)) %>%
      select(p.value),
    
    data.frame(
      loglik_nb = logLik(nb.sp_extralarge$fit),
      loglik_poisson = logLik(p.sp_extralarge$fit)) %>%
      mutate(p.value = pchisq(2 * (loglik_nb - loglik_poisson), 
                              df = 1, lower.tail = FALSE)) %>%
      select(p.value)))
  
# View goodness of fit statistics
gof
```


```{r}
# Remove excess data
remove(p.sp_extralarge, p.sp_large, p.sp_medium, p.sp_small, p.sp_total, 
       gof, aic, gvif, nagelkerke)
```


# 6. Visualizations

Finally, let's visualize the most statistically significant effects.

First, we'll write a function to generate mean values for most predictors, off of which we can predict solar adoption.

```{r, message=FALSE, warning = FALSE}
# What if we could see how changes in our independent variables 
# affect the predicted amount of solar we receive?

# To do so, let's create a fake dataset
# where all continuous variables are held at their mean,
# and the prefecture category is set to just one value, 
# except for one key variable we wish to see change in

sim = function(variable){
  
# Find the min and max of the variable you want to assess
bounds = dat.count %>%
  select(!!sym(variable)) %>%
  summarize(min = min(!!sym(variable)),
            max = max(!!sym(variable)))


# Create a data.frame with a range of values within those bounds
data.frame(
  id = variable,
  # If you choose, you can use those bounds to define the prediction
  # variable = seq(bounds$min, bounds$max, length.out = 100)) %>%
  # Or, you can set them to a more meaningful deviation around the mean, like 2 standard deviations
  variable = seq(-2, 2, length.out = 100)) %>%
  rename(!!sym(variable) := variable) %>%
  # Join in the mean of all variables in the model
  left_join(by = "id",
            y = dat.count %>%
              summarize_at(
                # Calculate the mean for all except these variables
                vars(-c(muni_code, muni, pref, utility, exclusion_zone,
                        sp_total, sp_small, sp_medium, 
                        sp_large, sp_extralarge, 
                        !!sym(variable))),
                funs(mean)) %>%
              # Be sure to add in a prefecture
              mutate(pref = factor("Saitama", levels = levels(dat.count$pref)),
                     exclusion_zone = factor("0", levels = c("1" ,"0"))) %>%
              # And add an ID - the name of the variable that was varied - to complete the join
              mutate(id = variable)) %>%
  return()
}
```


Generate data, varying by crime rate, damages, and unemployment, and predict the number of solar farms adopted.

```{r, message = FALSE, warning = FALSE}
# Now, let's simulate varying value for crime rates, etc.
simdata = c("crime_rates", "damages", "unemployment") %>%
  lapply(sim) %>% bind_rows()

# Write a function to predict solar farms
predict.model = function(model){
  predict(model$fit, 
          newdata = simdata, 
          type = "response",  
          se.fit = TRUE) %>%
    as.data.frame() %>%
    # Bind in simulated data
    bind_cols(simdata) %>%
    # Add dependent variable of model
    mutate(model = names(model$fit$model)[1])
}


# Generate predictions 
pred = list(nb.sp_total, nb.sp_small, nb.sp_medium,
            nb.sp_large, nb.sp_extralarge) %>%
  lapply(predict.model) %>% 
  bind_rows() %>%
  # Pivot each predictor into a single column
  pivot_longer(
    cols = c(crime_rates, damages, unemployment),
    names_to = "predictor",
    values_to = "value") %>%
  # Then keep only the predictor values from each one's predictions
  # (we don't want to plot the mean values of that predictor)
  filter(predictor == id) %>%
  # Now identify each predicted value as significant or not
  # Sadly, we must do this manually, 
  # specifying for which models which variables were significant.
  mutate(significant = if_else(
      # Damage was significant for total and small
      model %in% c("sp_total", "sp_small") & predictor == "damages", 1, 
    if_else(
      # Unemployment was significant for all but extra-large
      !model %in% c("sp_extralarge") & predictor == "unemployment", 1,
      if_else(
         # Crime rates were significant for small and medium
        model %in% c("sp_small", "sp_medium") & predictor == "crime_rates", 1, 0)))) %>%
  filter(significant == 1) %>%
  # Relabel and arrange labels as we want to see them in the chart
  mutate(
    Outcome = model %>% recode_factor(
    "sp_total" = "Total Solar Farms",
    "sp_small" = "Small Solar Farms",
    "sp_medium" = "Medium Solar Farms",
    "sp_large" = "Large Solar Farms",
    "sp_extralarge" = "Extra-Large Solar Farms"),
    predictor = predictor %>% recode_factor(
      "crime_rates" = "Crime Rate",
      "damages" = "Disaster Damage",
      "unemployment" = "Unemployment Rate")) 

```


Now visualize. The x-axis will have to be in terms of the scaled independent variables, such that an increase in 1 on the axis is an increase in that variable by 1 standard deviation from the mean.

```{r}
# Visualize
pred %>%
  ggplot(mapping = aes(x = value, y = fit,
                       ymin = fit - se.fit*1.96, 
                       ymax = fit + se.fit*1.96,
                       fill = Outcome,
                       linetype = Outcome)) +
  scale_fill_grey() +
  geom_ribbon(alpha = 0.75) +
  geom_line(color = "black") +
  theme_minimal() +
  facet_wrap(~predictor,scales = "free") +
  labs(x = "Increase in Predictor
       (Standard Deviations Greater than Mean)",
       y = "Predicted Solar Farms",
       title = "Significant Effects on Solar Adoption by Size")
  
```
